In [1]:
import pandas as pd
import numpy as np
import statsmodels.api as sm 
from linearmodels import PanelOLS
from linearmodels import RandomEffects
from scipy import stats
import statsmodels.formula.api as statf
import plotly.express as px
import seaborn as sns
from linearmodels.panel import compare 

from tqdm.notebook import tqdm
from IPython import display as ICD

from patsy import dmatrices
from statsmodels.stats.outliers_influence import variance_inflation_factor
from statsmodels.stats.diagnostic import het_white, het_breuschpagan
In [2]:
import plotly.offline as pyo
pyo.init_notebook_mode()

Связь меджу HDI и I-desi¶

Преодобратботка и очистка данных¶

Скачиваем iso код для стран¶

In [3]:
countries_code = pd.read_csv("all.csv")
countries_code
Out[3]:
name alpha-2 alpha-3 country-code iso_3166-2 region sub-region intermediate-region region-code sub-region-code intermediate-region-code
0 Afghanistan AF AFG 4 ISO 3166-2:AF Asia Southern Asia NaN 142.0 34.0 NaN
1 Åland Islands AX ALA 248 ISO 3166-2:AX Europe Northern Europe NaN 150.0 154.0 NaN
2 Albania AL ALB 8 ISO 3166-2:AL Europe Southern Europe NaN 150.0 39.0 NaN
3 Algeria DZ DZA 12 ISO 3166-2:DZ Africa Northern Africa NaN 2.0 15.0 NaN
4 American Samoa AS ASM 16 ISO 3166-2:AS Oceania Polynesia NaN 9.0 61.0 NaN
... ... ... ... ... ... ... ... ... ... ... ...
244 Wallis and Futuna WF WLF 876 ISO 3166-2:WF Oceania Polynesia NaN 9.0 61.0 NaN
245 Western Sahara EH ESH 732 ISO 3166-2:EH Africa Northern Africa NaN 2.0 15.0 NaN
246 Yemen YE YEM 887 ISO 3166-2:YE Asia Western Asia NaN 142.0 145.0 NaN
247 Zambia ZM ZMB 894 ISO 3166-2:ZM Africa Sub-Saharan Africa Eastern Africa 2.0 202.0 14.0
248 Zimbabwe ZW ZWE 716 ISO 3166-2:ZW Africa Sub-Saharan Africa Eastern Africa 2.0 202.0 14.0

249 rows × 11 columns

Скачиваем итоговый рейтинг за период по индексу I-desi¶

In [4]:
desi_total = pd.read_csv("desi_total-data (2).csv")
desi_total.rename(columns={'value': 'total_score_desi'}, inplace=True)
desi_total = desi_total[desi_total['country']!='EU']
desi_total.loc[desi_total['country'] == "EL", 'country'] = "GR"
desi_total 
Out[4]:
period country indicator breakdown unit total_score_desi flags
0 2022 AT desi_total desi_total pc_desi 54.675671 NaN
1 2022 BE desi_total desi_total pc_desi 50.307388 NaN
2 2022 BG desi_total desi_total pc_desi 37.679882 NaN
3 2022 CY desi_total desi_total pc_desi 48.352205 NaN
4 2022 CZ desi_total desi_total pc_desi 49.143522 NaN
... ... ... ... ... ... ... ...
162 2017 PT desi_total desi_total pc_desi 35.478987 NaN
163 2017 RO desi_total desi_total pc_desi 19.399117 NaN
164 2017 SE desi_total desi_total pc_desi 45.711845 NaN
165 2017 SI desi_total desi_total pc_desi 35.702736 NaN
166 2017 SK desi_total desi_total pc_desi 29.783805 NaN

162 rows × 7 columns

Скачиваем показатели по 4 субиндексам I-desi за период¶

In [5]:
desi_1 = pd.read_csv("desi-data.csv", sep=',', encoding='ISO-8859-1')
desi_1.loc[desi_1['country'] == "EL", 'country'] = "GR"
desi_1
Out[5]:
period country indicator breakdown unit value flags
0 2022 AT desi desi_hc pc_desi 12.738011 NaN
1 2022 BE desi desi_hc pc_desi 12.172748 NaN
2 2022 BG desi desi_hc pc_desi 8.147561 NaN
3 2022 CY desi desi_hc pc_desi 10.441309 NaN
4 2022 CZ desi desi_hc pc_desi 11.397470 NaN
... ... ... ... ... ... ... ...
667 2017 RO desi desi_dps pc_desi 1.853090 NaN
668 2017 SE desi desi_dps pc_desi 14.785592 NaN
669 2017 SI desi desi_dps pc_desi 11.599579 NaN
670 2017 SK desi desi_dps pc_desi 9.072821 NaN
671 2017 EU desi desi_dps pc_desi 11.675391 NaN

672 rows × 7 columns

In [6]:
desi_1 = desi_1.merge(countries_code[["alpha-3", "alpha-2", 'name']], how='left', left_on='country', right_on='alpha-2')
desi_1 = desi_1.merge(desi_total[['country','period', "total_score_desi"]], how='left', on=['country', 'period'])
desi_1
Out[6]:
period country indicator breakdown unit value flags alpha-3 alpha-2 name total_score_desi
0 2022 AT desi desi_hc pc_desi 12.738011 NaN AUT AT Austria 54.675671
1 2022 BE desi desi_hc pc_desi 12.172748 NaN BEL BE Belgium 50.307388
2 2022 BG desi desi_hc pc_desi 8.147561 NaN BGR BG Bulgaria 37.679882
3 2022 CY desi desi_hc pc_desi 10.441309 NaN CYP CY Cyprus 48.352205
4 2022 CZ desi desi_hc pc_desi 11.397470 NaN CZE CZ Czechia 49.143522
... ... ... ... ... ... ... ... ... ... ... ...
667 2017 RO desi desi_dps pc_desi 1.853090 NaN ROU RO Romania 19.399117
668 2017 SE desi desi_dps pc_desi 14.785592 NaN SWE SE Sweden 45.711845
669 2017 SI desi desi_dps pc_desi 11.599579 NaN SVN SI Slovenia 35.702736
670 2017 SK desi desi_dps pc_desi 9.072821 NaN SVK SK Slovakia 29.783805
671 2017 EU desi desi_dps pc_desi 11.675391 NaN NaN NaN NaN NaN

672 rows × 11 columns

Скачиваем показатели за период по индексу HDI. Через цикл переносим года в строки, а показатели субиндекса остаюстся столбцами¶

In [7]:
hdi_1 = pd.read_csv("HDR23-24_Composite_indices_complete_time_series (2).csv", sep=',', encoding='ISO-8859-1')
hdi_1 = hdi_1[["iso3",'hdi_2017', 'hdi_2018', 'hdi_2019', 'hdi_2020', 'hdi_2021', 'hdi_2022',
                                 'le_2017', 'le_2018', 'le_2019', 'le_2020', 'le_2021', 'le_2022',
                                 'eys_2017', 'eys_2018', 'eys_2019', 'eys_2020', 'eys_2021', 'eys_2022',
                                 'mys_2017', 'mys_2018', 'mys_2019', 'mys_2020', 'mys_2021', 'mys_2022',
                                 'gnipc_2017', 'gnipc_2018', 'gnipc_2019', 'gnipc_2020', 'gnipc_2021', 'gnipc_2022']]
hdi_1
Out[7]:
iso3 hdi_2017 hdi_2018 hdi_2019 hdi_2020 hdi_2021 hdi_2022 le_2017 le_2018 le_2019 ... mys_2019 mys_2020 mys_2021 mys_2022 gnipc_2017 gnipc_2018 gnipc_2019 gnipc_2020 gnipc_2021 gnipc_2022
0 AFG 0.485 0.486 0.492 0.488 0.473 0.462 63.016000 63.081000 63.565000 ... 2.698150 2.841610 2.985070 2.514790 2123.676477 2082.113799 2112.986381 1986.803772 1534.135377 1335.205733
1 ALB 0.796 0.797 0.800 0.784 0.785 0.789 79.047000 79.184000 79.282000 ... 10.072996 10.121144 10.121144 10.121144 12802.175940 13302.733030 13481.971800 13069.127700 14399.780470 15293.326510
2 DZA 0.738 0.740 0.742 0.730 0.740 0.745 75.743000 76.066000 76.474000 ... 7.020270 6.987444 6.987444 6.987444 11633.272120 11438.083580 11353.521810 10634.883980 10823.118130 10978.405710
3 AND 0.860 0.863 0.865 0.843 0.855 0.884 82.980000 82.992000 83.004000 ... 11.084770 11.260993 11.437216 11.613440 54906.348230 54904.290060 55035.017860 48026.390490 51128.347500 54233.449480
4 AGO 0.597 0.598 0.597 0.594 0.590 0.591 61.680000 62.144000 62.448000 ... 5.734512 5.844292 5.844292 5.844292 6772.546424 6310.063799 5918.032996 5487.669515 5332.948130 5327.788251
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
201 ZZG.ECA 0.793 0.798 0.802 0.792 0.797 0.802 74.343178 74.505838 74.700669 ... 10.556794 10.626553 10.618775 10.609179 17282.309560 17776.029860 18014.099520 18036.728660 19214.929590 19762.981770
202 ZZH.LAC 0.764 0.766 0.768 0.755 0.754 0.763 74.705783 74.823396 75.014843 ... 8.860465 8.969176 8.973933 8.967833 15198.560840 15131.255810 14985.581440 13848.678100 14646.857190 15109.406150
203 ZZI.SA 0.631 0.632 0.635 0.635 0.631 0.641 70.245298 70.517337 70.722512 ... 6.358383 6.582805 6.612892 6.629607 6197.721643 6460.624904 6605.112625 6237.998412 6638.663046 6971.625949
204 ZZJ.SSA 0.540 0.544 0.549 0.547 0.546 0.549 60.348740 60.735554 61.120022 ... 5.898415 5.886779 5.989415 5.975957 3688.769761 3695.077956 3715.500559 3570.227167 3611.615300 3666.202926
205 ZZK.WORLD 0.732 0.735 0.739 0.736 0.735 0.739 72.568987 72.816178 73.012121 ... 8.574383 8.686374 8.682265 8.661598 16060.191190 16431.285550 16713.676430 16015.845830 16837.841800 17254.434040

206 rows × 31 columns

In [8]:
years_of_interest = [2017, 2018, 2019, 2020, 2021, 2022]
indices_of_interest = ['hdi', 'le', 'eys', 'mys', 'gnipc']

hdi_1_nonflat = pd.DataFrame({'iso3': list(hdi_1['iso3'].values)*6})
hdi_1_nonflat = hdi_1_nonflat.sort_values(['iso3'])
hdi_1_nonflat['year'] = years_of_interest*hdi_1.shape[0]
hdi_1_nonflat[indices_of_interest] = None
hdi_1_nonflat = hdi_1_nonflat.reset_index(drop=True)
hdi_1_nonflat
Out[8]:
iso3 year hdi le eys mys gnipc
0 AFG 2017 None None None None None
1 AFG 2018 None None None None None
2 AFG 2019 None None None None None
3 AFG 2020 None None None None None
4 AFG 2021 None None None None None
... ... ... ... ... ... ... ...
1231 ZZK.WORLD 2018 None None None None None
1232 ZZK.WORLD 2019 None None None None None
1233 ZZK.WORLD 2020 None None None None None
1234 ZZK.WORLD 2021 None None None None None
1235 ZZK.WORLD 2022 None None None None None

1236 rows × 7 columns

In [9]:
hdi_1_nonflat.groupby('iso3')['year'].nunique().value_counts()
Out[9]:
year
6    206
Name: count, dtype: int64
In [10]:
for index in indices_of_interest:
    for year in years_of_interest:
        for country in hdi_1_nonflat['iso3'].unique():
            hdi_1_nonflat.loc[(hdi_1_nonflat['year']==year)&(hdi_1_nonflat['iso3']==country),
                           index] = hdi_1.loc[hdi_1['iso3']==country, f'{index}_{year}'].values[0]
In [11]:
hdi_1_nonflat
Out[11]:
iso3 year hdi le eys mys gnipc
0 AFG 2017 0.485 63.016 10.519565 2.41123 2123.676477
1 AFG 2018 0.486 63.081 10.53786 2.55469 2082.113799
2 AFG 2019 0.492 63.565 10.621292 2.69815 2112.986381
3 AFG 2020 0.488 62.575 10.705385 2.84161 1986.803772
4 AFG 2021 0.473 61.982 10.705385 2.98507 1534.135377
... ... ... ... ... ... ... ...
1231 ZZK.WORLD 2018 0.735 72.816178 12.651182 8.483187 16431.28555
1232 ZZK.WORLD 2019 0.739 73.012121 12.726141 8.574383 16713.67643
1233 ZZK.WORLD 2020 0.736 72.257355 12.860674 8.686374 16015.84583
1234 ZZK.WORLD 2021 0.735 71.365524 12.91184 8.682265 16837.8418
1235 ZZK.WORLD 2022 0.739 72.00407 12.989766 8.661598 17254.43404

1236 rows × 7 columns

Скачиваем данные для контрольных переменных (уровень безработицы, расходы на образование, расходы на здравоохранение)¶

In [12]:
unemployment = pd.read_csv("Unemployment.csv", sep=',')
unemployment = unemployment[["geo", "TIME_PERIOD", "OBS_VALUE"]]
unemployment.rename(columns={'geo': 'iso2', "OBS_VALUE": "unemployment"}, inplace=True)
unemployment.loc[unemployment['iso2'] == "EL", 'iso2'] = "GR"
unemployment
Out[12]:
iso2 TIME_PERIOD unemployment
0 AT 2017 5.9
1 AT 2018 5.2
2 AT 2019 4.8
3 AT 2020 6.0
4 AT 2021 6.2
... ... ... ...
157 SK 2018 6.5
158 SK 2019 5.7
159 SK 2020 6.7
160 SK 2021 6.8
161 SK 2022 6.1

162 rows × 3 columns

In [13]:
education = pd.read_csv("education.csv", sep=',')
education = education[["geo", "TIME_PERIOD", "OBS_VALUE"]]
education.rename(columns={'geo': 'iso2', "OBS_VALUE": "education"}, inplace=True)
education.loc[education['iso2'] == "EL", 'iso2'] = "GR"
education
Out[13]:
iso2 TIME_PERIOD education
0 AT 2017 4.8
1 AT 2018 4.8
2 AT 2019 4.8
3 AT 2020 5.1
4 AT 2021 4.9
... ... ... ...
175 SK 2018 3.9
176 SK 2019 4.2
177 SK 2020 4.4
178 SK 2021 4.3
179 SK 2022 4.5

180 rows × 3 columns

In [14]:
health = pd.read_csv("health.csv", sep=',')
health = health[["geo", "TIME_PERIOD", "OBS_VALUE"]]
health.rename(columns={'geo': 'iso2', "OBS_VALUE": "health"}, inplace=True)
health.loc[health['iso2'] == "EL", 'iso2'] = "GR"
health
Out[14]:
iso2 TIME_PERIOD health
0 AT 2017 8.2
1 AT 2018 8.2
2 AT 2019 8.3
3 AT 2020 9.2
4 AT 2021 10.1
... ... ... ...
175 SK 2018 5.5
176 SK 2019 5.7
177 SK 2020 6.1
178 SK 2021 6.9
179 SK 2022 6.4

180 rows × 3 columns

Создаем общий датафрейм, в который входят показатели HDI и I-desi по странам ЕС за 2017-2022¶

In [15]:
desi_hdi_df = desi_1.merge(hdi_1_nonflat,
                           how='left', left_on=['alpha-3', 'period'], right_on=['iso3', 'year'])
desi_hdi_df
Out[15]:
period country indicator breakdown unit value flags alpha-3 alpha-2 name total_score_desi iso3 year hdi le eys mys gnipc
0 2022 AT desi desi_hc pc_desi 12.738011 NaN AUT AT Austria 54.675671 AUT 2022.0 0.926 82.412 16.36746 12.305714 56529.66329
1 2022 BE desi desi_hc pc_desi 12.172748 NaN BEL BE Belgium 50.307388 BEL 2022.0 0.942 82.293 18.94574 12.528578 53644.03854
2 2022 BG desi desi_hc pc_desi 8.147561 NaN BGR BG Bulgaria 37.679882 BGR 2022.0 0.799 71.528 13.86803 11.41318 25920.80375
3 2022 CY desi desi_hc pc_desi 10.441309 NaN CYP CY Cyprus 48.352205 CYP 2022.0 0.907 81.889 16.24309 12.44017 40136.89453
4 2022 CZ desi desi_hc pc_desi 11.397470 NaN CZE CZ Czechia 49.143522 CZE 2022.0 0.895 78.129 16.347281 12.916053 39944.66682
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
667 2017 RO desi desi_dps pc_desi 1.853090 NaN ROU RO Romania 19.399117 ROU 2017.0 0.823 75.952 14.23019 11.14865 26557.75639
668 2017 SE desi desi_dps pc_desi 14.785592 NaN SWE SE Sweden 45.711845 SWE 2017.0 0.941 82.438 18.864201 12.48605 52868.77901
669 2017 SI desi desi_dps pc_desi 11.599579 NaN SVN SI Slovenia 35.702736 SVN 2017.0 0.912 81.071 17.47266 12.70029 35849.55565
670 2017 SK desi desi_dps pc_desi 9.072821 NaN SVK SK Slovakia 29.783805 SVK 2017.0 0.857 77.219 14.59997 12.76618 29599.14794
671 2017 EU desi desi_dps pc_desi 11.675391 NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN NaN

672 rows × 18 columns

In [16]:
desi_hdi_df = desi_hdi_df[desi_hdi_df['country']!='EU']
desi_hdi_df = desi_hdi_df.drop(['unit', 'flags', "alpha-3", "alpha-2", "indicator", "year"], axis=1)
desi_hdi_df
Out[16]:
period country breakdown value name total_score_desi iso3 hdi le eys mys gnipc
0 2022 AT desi_hc 12.738011 Austria 54.675671 AUT 0.926 82.412 16.36746 12.305714 56529.66329
1 2022 BE desi_hc 12.172748 Belgium 50.307388 BEL 0.942 82.293 18.94574 12.528578 53644.03854
2 2022 BG desi_hc 8.147561 Bulgaria 37.679882 BGR 0.799 71.528 13.86803 11.41318 25920.80375
3 2022 CY desi_hc 10.441309 Cyprus 48.352205 CYP 0.907 81.889 16.24309 12.44017 40136.89453
4 2022 CZ desi_hc 11.397470 Czechia 49.143522 CZE 0.895 78.129 16.347281 12.916053 39944.66682
... ... ... ... ... ... ... ... ... ... ... ... ...
666 2017 PT desi_dps 12.385897 Portugal 35.478987 PRT 0.857 81.498 16.3622 9.21823 32269.04155
667 2017 RO desi_dps 1.853090 Romania 19.399117 ROU 0.823 75.952 14.23019 11.14865 26557.75639
668 2017 SE desi_dps 14.785592 Sweden 45.711845 SWE 0.941 82.438 18.864201 12.48605 52868.77901
669 2017 SI desi_dps 11.599579 Slovenia 35.702736 SVN 0.912 81.071 17.47266 12.70029 35849.55565
670 2017 SK desi_dps 9.072821 Slovakia 29.783805 SVK 0.857 77.219 14.59997 12.76618 29599.14794

648 rows × 12 columns

In [17]:
desi_hdi_df[indices_of_interest] = desi_hdi_df[indices_of_interest].astype(float)
In [18]:
desi_hdi_df['breakdown'].unique()
Out[18]:
array(['desi_hc', 'desi_conn', 'desi_idt', 'desi_dps'], dtype=object)

Добавление контроля в датафрейм¶

In [19]:
desi_hdi_df = desi_hdi_df.merge(unemployment,
                           how='left', left_on=['country', 'period'], right_on=['iso2', 'TIME_PERIOD'])
desi_hdi_df
Out[19]:
period country breakdown value name total_score_desi iso3 hdi le eys mys gnipc iso2 TIME_PERIOD unemployment
0 2022 AT desi_hc 12.738011 Austria 54.675671 AUT 0.926 82.412 16.367460 12.305714 56529.66329 AT 2022 4.8
1 2022 BE desi_hc 12.172748 Belgium 50.307388 BEL 0.942 82.293 18.945740 12.528578 53644.03854 BE 2022 5.6
2 2022 BG desi_hc 8.147561 Bulgaria 37.679882 BGR 0.799 71.528 13.868030 11.413180 25920.80375 BG 2022 4.2
3 2022 CY desi_hc 10.441309 Cyprus 48.352205 CYP 0.907 81.889 16.243090 12.440170 40136.89453 CY 2022 6.8
4 2022 CZ desi_hc 11.397470 Czechia 49.143522 CZE 0.895 78.129 16.347281 12.916053 39944.66682 CZ 2022 2.2
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
643 2017 PT desi_dps 12.385897 Portugal 35.478987 PRT 0.857 81.498 16.362200 9.218230 32269.04155 PT 2017 9.2
644 2017 RO desi_dps 1.853090 Romania 19.399117 ROU 0.823 75.952 14.230190 11.148650 26557.75639 RO 2017 6.1
645 2017 SE desi_dps 14.785592 Sweden 45.711845 SWE 0.941 82.438 18.864201 12.486050 52868.77901 SE 2017 6.8
646 2017 SI desi_dps 11.599579 Slovenia 35.702736 SVN 0.912 81.071 17.472660 12.700290 35849.55565 SI 2017 6.6
647 2017 SK desi_dps 9.072821 Slovakia 29.783805 SVK 0.857 77.219 14.599970 12.766180 29599.14794 SK 2017 8.1

648 rows × 15 columns

In [20]:
desi_hdi_df = desi_hdi_df.merge(education,
                           how='left', left_on=['country', 'period'], right_on=['iso2', 'TIME_PERIOD'])
desi_hdi_df = desi_hdi_df.merge(health,
                           how='left', left_on=['country', 'period'], right_on=['iso2', 'TIME_PERIOD'])
In [21]:
desi_hdi_df = desi_hdi_df.drop(['iso2_x', 'TIME_PERIOD_x', "iso2_y", "TIME_PERIOD_y", "iso2", "TIME_PERIOD"], axis=1)
desi_hdi_df
Out[21]:
period country breakdown value name total_score_desi iso3 hdi le eys mys gnipc unemployment education health
0 2022 AT desi_hc 12.738011 Austria 54.675671 AUT 0.926 82.412 16.367460 12.305714 56529.66329 4.8 4.8 9.3
1 2022 BE desi_hc 12.172748 Belgium 50.307388 BEL 0.942 82.293 18.945740 12.528578 53644.03854 5.6 6.3 8.1
2 2022 BG desi_hc 8.147561 Bulgaria 37.679882 BGR 0.799 71.528 13.868030 11.413180 25920.80375 4.2 3.9 5.6
3 2022 CY desi_hc 10.441309 Cyprus 48.352205 CYP 0.907 81.889 16.243090 12.440170 40136.89453 6.8 5.1 6.2
4 2022 CZ desi_hc 11.397470 Czechia 49.143522 CZE 0.895 78.129 16.347281 12.916053 39944.66682 2.2 4.9 9.1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
643 2017 PT desi_dps 12.385897 Portugal 35.478987 PRT 0.857 81.498 16.362200 9.218230 32269.04155 9.2 4.6 6.2
644 2017 RO desi_dps 1.853090 Romania 19.399117 ROU 0.823 75.952 14.230190 11.148650 26557.75639 6.1 2.9 4.4
645 2017 SE desi_dps 14.785592 Sweden 45.711845 SWE 0.941 82.438 18.864201 12.486050 52868.77901 6.8 6.7 6.8
646 2017 SI desi_dps 11.599579 Slovenia 35.702736 SVN 0.912 81.071 17.472660 12.700290 35849.55565 6.6 5.4 6.6
647 2017 SK desi_dps 9.072821 Slovakia 29.783805 SVK 0.857 77.219 14.599970 12.766180 29599.14794 8.1 3.9 5.5

648 rows × 15 columns

Разведочный анализ данных с помощью визуализации¶

In [22]:
desi_hc = desi_hdi_df[desi_hdi_df['breakdown'] == "desi_hc"]
desi_hc = desi_hc.rename({'value': 'desi_hc'}, axis=1)

desi_conn = desi_hdi_df[desi_hdi_df['breakdown'] == "desi_conn"]
desi_conn = desi_conn.rename({'value': 'desi_conn'}, axis=1)

desi_idt = desi_hdi_df[desi_hdi_df['breakdown'] == "desi_idt"]
desi_idt = desi_idt.rename({'value': 'desi_idt'}, axis=1)

desi_dps = desi_hdi_df[desi_hdi_df['breakdown'] == "desi_dps"]
desi_dps = desi_dps.rename({'value': 'desi_dps'}, axis=1)

final_df = desi_hc.merge(desi_conn[['desi_conn', 'period', 'iso3']], on=['period', 'iso3'])
final_df = final_df.merge(desi_idt[['desi_idt', 'period', 'iso3']], on=['period', 'iso3'])
final_df = final_df.merge(desi_dps[['desi_dps', 'period', 'iso3']], on=['period', 'iso3'])
final_df
Out[22]:
period country breakdown desi_hc name total_score_desi iso3 hdi le eys mys gnipc unemployment education health desi_conn desi_idt desi_dps
0 2022 AT desi_hc 12.738011 Austria 54.675671 AUT 0.926 82.412 16.367460 12.305714 56529.66329 4.8 4.8 9.3 14.116365 9.791828 18.029467
1 2022 BE desi_hc 12.172748 Belgium 50.307388 BEL 0.942 82.293 18.945740 12.528578 53644.03854 5.6 6.3 8.1 9.956698 11.989702 16.188240
2 2022 BG desi_hc 8.147561 Bulgaria 37.679882 BGR 0.799 71.528 13.868030 11.413180 25920.80375 4.2 3.9 5.6 12.675763 3.882440 12.974119
3 2022 CY desi_hc 10.441309 Cyprus 48.352205 CYP 0.907 81.889 16.243090 12.440170 40136.89453 6.8 5.1 6.2 14.694059 8.836923 14.379914
4 2022 CZ desi_hc 11.397470 Czechia 49.143522 CZE 0.895 78.129 16.347281 12.916053 39944.66682 2.2 4.9 9.1 13.172476 8.459601 16.113975
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
157 2017 PT desi_hc 9.731173 Portugal 35.478987 PRT 0.857 81.498 16.362200 9.218230 32269.04155 9.2 4.6 6.2 6.734946 6.626971 12.385897
158 2017 RO desi_hc 6.973170 Romania 19.399117 ROU 0.823 75.952 14.230190 11.148650 26557.75639 6.1 2.9 4.4 7.893893 2.678963 1.853090
159 2017 SE desi_hc 13.499455 Sweden 45.711845 SWE 0.941 82.438 18.864201 12.486050 52868.77901 6.8 6.7 6.8 8.987967 8.438830 14.785592
160 2017 SI desi_hc 10.102175 Slovenia 35.702736 SVN 0.912 81.071 17.472660 12.700290 35849.55565 6.6 5.4 6.6 7.561989 6.438993 11.599579
161 2017 SK desi_hc 9.404006 Slovakia 29.783805 SVK 0.857 77.219 14.599970 12.766180 29599.14794 8.1 3.9 5.5 6.512537 4.794442 9.072821

162 rows × 18 columns

In [23]:
fig = px.scatter(final_df, x = 'desi_hc', y= 'hdi', text='country', hover_data=['iso3', 'name', 'period'], trendline="ols", 
                 title = "Human development index and Human capital(I-desi)")
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)
fig.show()
In [24]:
fig = px.scatter(final_df, x = 'desi_conn', y= 'hdi', text='country', hover_data=['iso3', 'name', 'period'], trendline="ols",
                title = "Human development index and Connectivity(I-desi)")
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)
fig.show()
In [25]:
fig = px.scatter(final_df, x = 'desi_idt', y= 'hdi', text='country', hover_data=['iso3', 'name', 'period'], trendline="ols",
                title = "Human development index and Integration of digital technology(I-desi)")
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)
fig.show()
In [26]:
fig = px.scatter(final_df, x = 'desi_dps', y= 'hdi', text='country', hover_data=['iso3', 'name', 'period'], trendline="ols",
                title = "Human development index and Digital public services(I-desi)")
fig.update_layout(
    autosize=False,
    width=800,
    height=600,
)
fig.show()

Построение модели регресии между HDI и I-desi¶

In [27]:
final_df = final_df[['period', 'name', 'iso3', 'total_score_desi', 'hdi', 'le',
       'eys', 'mys', 'gnipc', 'desi_conn', 'desi_idt', 'desi_dps', 'desi_hc', 'unemployment', 'education', 'health']] 
final_df
Out[27]:
period name iso3 total_score_desi hdi le eys mys gnipc desi_conn desi_idt desi_dps desi_hc unemployment education health
0 2022 Austria AUT 54.675671 0.926 82.412 16.367460 12.305714 56529.66329 14.116365 9.791828 18.029467 12.738011 4.8 4.8 9.3
1 2022 Belgium BEL 50.307388 0.942 82.293 18.945740 12.528578 53644.03854 9.956698 11.989702 16.188240 12.172748 5.6 6.3 8.1
2 2022 Bulgaria BGR 37.679882 0.799 71.528 13.868030 11.413180 25920.80375 12.675763 3.882440 12.974119 8.147561 4.2 3.9 5.6
3 2022 Cyprus CYP 48.352205 0.907 81.889 16.243090 12.440170 40136.89453 14.694059 8.836923 14.379914 10.441309 6.8 5.1 6.2
4 2022 Czechia CZE 49.143522 0.895 78.129 16.347281 12.916053 39944.66682 13.172476 8.459601 16.113975 11.397470 2.2 4.9 9.1
... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ... ...
157 2017 Portugal PRT 35.478987 0.857 81.498 16.362200 9.218230 32269.04155 6.734946 6.626971 12.385897 9.731173 9.2 4.6 6.2
158 2017 Romania ROU 19.399117 0.823 75.952 14.230190 11.148650 26557.75639 7.893893 2.678963 1.853090 6.973170 6.1 2.9 4.4
159 2017 Sweden SWE 45.711845 0.941 82.438 18.864201 12.486050 52868.77901 8.987967 8.438830 14.785592 13.499455 6.8 6.7 6.8
160 2017 Slovenia SVN 35.702736 0.912 81.071 17.472660 12.700290 35849.55565 7.561989 6.438993 11.599579 10.102175 6.6 5.4 6.6
161 2017 Slovakia SVK 29.783805 0.857 77.219 14.599970 12.766180 29599.14794 6.512537 4.794442 9.072821 9.404006 8.1 3.9 5.5

162 rows × 16 columns

Наш получившийся датафрейм final_df охватывает период значений индексов за 2017-2022 годы и состоит из:

  • name, iso3 - 27 стран ЕС
  • измерения индекса цифровизации I-desi по 4 субиндексам (каждый весит 25%): Human Capital, Connectivity, Integration of Digital Technology, Digital Public Services:
    • desi_hc субиндекс Human capital
    • desi_conn субиндекс Connectivity
    • desi_idt субиндекс Integration of digital technology
    • desi_dps субиндекс Digital public services
  • total_score_desi - общий индекс I-desi стран за период

В нашем исслеедовании общий показатель показатель I-desi и каждый отдельный из 4 субиндексов будут независимыми переменными.

Каждый из субиндексов измеряется по нескольким параметрам. Но в нашем регрессионом анализе мы не будем уходить на такой уровень детализации.

Все значения нормированы по минимально-максимальному подходу: минимальный фактический показатель умножается на 0.75, максимальный фактический показатель умножается на 1.75. Общее значение шкалы от 0 до 100, где 100 -- лучшее значение.

  • hdi - обобщенный усредненный показатель ИЧР за каждый год периода (с 2017 по 2022)
  • le - ожидаемая продолжительность жизни
  • eys - ожидаемое количество лет обучения
  • mys- среднее количество лет обучения
  • pc - ВНД на душу (в ценах 2017 года)

Субиндексы HDI в нашем исследовании будут зависимыми переменными.

Контрольные переменные:

  • unemployment- уровень безработицы в %
  • education - гос.расходы на образование в % от всех расходов
  • health - гос.расходы на здравоохранение в % от всех расходов

Так как данные панельные, то мы будем использовать модель OLS для панельных данных

In [28]:
final_df_index = final_df.sort_values(['name', 'period']).set_index(['name', 'period'])

Регрессии всех показателей цифровизации на каждый отдельный субиндекс качества жизни¶

In [29]:
corr = final_df[['total_score_desi', 'hdi', 'le',
       'eys', 'mys', 'gnipc', 'desi_conn', 'desi_idt', 'desi_dps', 'desi_hc', 'unemployment', 'education', 'health']].corr()

sns.set_theme(rc={'figure.figsize':(11.7,8.27)})
sns.heatmap(corr, annot=True)
Out[29]:
<Axes: >
No description has been provided for this image

Регрессии с дамми переменными¶

In [30]:
model00 = statf.ols('hdi ~ total_score_desi + health + unemployment + education + C(name, Treatment("Finland")) + C(period)', data = final_df).fit()
print(model00.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    hdi   R-squared:                       0.994
Model:                            OLS   Adj. R-squared:                  0.993
Method:                 Least Squares   F-statistic:                     614.9
Date:                Thu, 13 Jun 2024   Prob (F-statistic):          6.67e-125
Time:                        10:31:29   Log-Likelihood:                 719.87
No. Observations:                 162   AIC:                            -1368.
Df Residuals:                     126   BIC:                            -1257.
Df Model:                          35                                         
Covariance Type:            nonrobust                                         
================================================================================================================
                                                   coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------------------------
Intercept                                        0.9164      0.015     59.230      0.000       0.886       0.947
C(name, Treatment("Finland"))[T.Austria]        -0.0132      0.004     -3.291      0.001      -0.021      -0.005
C(name, Treatment("Finland"))[T.Belgium]         0.0091      0.004      2.522      0.013       0.002       0.016
C(name, Treatment("Finland"))[T.Bulgaria]       -0.1186      0.007    -16.176      0.000      -0.133      -0.104
C(name, Treatment("Finland"))[T.Croatia]        -0.0612      0.005    -12.437      0.000      -0.071      -0.051
C(name, Treatment("Finland"))[T.Cyprus]         -0.0237      0.005     -4.350      0.000      -0.034      -0.013
C(name, Treatment("Finland"))[T.Czechia]        -0.0361      0.005     -7.129      0.000      -0.046      -0.026
C(name, Treatment("Finland"))[T.Denmark]         0.0088      0.002      3.976      0.000       0.004       0.013
C(name, Treatment("Finland"))[T.Estonia]        -0.0396      0.003    -13.185      0.000      -0.046      -0.034
C(name, Treatment("Finland"))[T.France]         -0.0238      0.004     -5.921      0.000      -0.032      -0.016
C(name, Treatment("Finland"))[T.Germany]         0.0156      0.005      3.263      0.001       0.006       0.025
C(name, Treatment("Finland"))[T.Greece]         -0.0302      0.008     -3.763      0.000      -0.046      -0.014
C(name, Treatment("Finland"))[T.Hungary]        -0.0755      0.006    -12.933      0.000      -0.087      -0.064
C(name, Treatment("Finland"))[T.Ireland]         0.0005      0.005      0.107      0.915      -0.009       0.010
C(name, Treatment("Finland"))[T.Italy]          -0.0306      0.006     -5.243      0.000      -0.042      -0.019
C(name, Treatment("Finland"))[T.Latvia]         -0.0569      0.004    -13.713      0.000      -0.065      -0.049
C(name, Treatment("Finland"))[T.Lithuania]      -0.0511      0.004    -12.435      0.000      -0.059      -0.043
C(name, Treatment("Finland"))[T.Luxembourg]     -0.0137      0.003     -4.252      0.000      -0.020      -0.007
C(name, Treatment("Finland"))[T.Malta]          -0.0306      0.003     -9.753      0.000      -0.037      -0.024
C(name, Treatment("Finland"))[T.Netherlands]     0.0006      0.002      0.256      0.798      -0.004       0.005
C(name, Treatment("Finland"))[T.Poland]         -0.0461      0.006     -7.234      0.000      -0.059      -0.033
C(name, Treatment("Finland"))[T.Portugal]       -0.0676      0.004    -15.648      0.000      -0.076      -0.059
C(name, Treatment("Finland"))[T.Romania]        -0.0937      0.009    -10.636      0.000      -0.111      -0.076
C(name, Treatment("Finland"))[T.Slovakia]       -0.0691      0.006    -11.885      0.000      -0.081      -0.058
C(name, Treatment("Finland"))[T.Slovenia]       -0.0128      0.004     -3.471      0.001      -0.020      -0.005
C(name, Treatment("Finland"))[T.Spain]          -0.0324      0.004     -7.538      0.000      -0.041      -0.024
C(name, Treatment("Finland"))[T.Sweden]          0.0131      0.003      5.192      0.000       0.008       0.018
C(period)[T.2018]                                0.0012      0.001      1.131      0.260      -0.001       0.003
C(period)[T.2019]                                0.0029      0.002      1.896      0.060      -0.000       0.006
C(period)[T.2020]                               -0.0025      0.002     -1.113      0.268      -0.007       0.002
C(period)[T.2021]                               -0.0047      0.003     -1.577      0.117      -0.011       0.001
C(period)[T.2022]                               -0.0042      0.004     -1.076      0.284      -0.012       0.004
total_score_desi                                 0.0008      0.000      3.585      0.000       0.000       0.001
health                                           0.0001      0.001      0.179      0.858      -0.001       0.002
unemployment                                    -0.0004      0.000     -1.191      0.236      -0.001       0.000
education                                       -0.0032      0.002     -2.137      0.035      -0.006      -0.000
==============================================================================
Omnibus:                        2.482   Durbin-Watson:                   2.583
Prob(Omnibus):                  0.289   Jarque-Bera (JB):                2.320
Skew:                           0.089   Prob(JB):                        0.313
Kurtosis:                       3.558   Cond. No.                     4.87e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 4.87e+03. This might indicate that there are
strong multicollinearity or other numerical problems.
In [31]:
model01 = statf.ols('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + health + unemployment + education + C(name, Treatment("Finland")) + C(period)', 
                    data = final_df).fit()
print(model01.summary())
                            OLS Regression Results                            
==============================================================================
Dep. Variable:                    hdi   R-squared:                       0.995
Model:                            OLS   Adj. R-squared:                  0.993
Method:                 Least Squares   F-statistic:                     641.5
Date:                Thu, 13 Jun 2024   Prob (F-statistic):          1.12e-124
Time:                        10:31:29   Log-Likelihood:                 731.85
No. Observations:                 162   AIC:                            -1386.
Df Residuals:                     123   BIC:                            -1265.
Df Model:                          38                                         
Covariance Type:            nonrobust                                         
================================================================================================================
                                                   coef    std err          t      P>|t|      [0.025      0.975]
----------------------------------------------------------------------------------------------------------------
Intercept                                        0.9280      0.026     35.164      0.000       0.876       0.980
C(name, Treatment("Finland"))[T.Austria]        -0.0137      0.006     -2.223      0.028      -0.026      -0.002
C(name, Treatment("Finland"))[T.Belgium]         0.0004      0.008      0.053      0.958      -0.015       0.016
C(name, Treatment("Finland"))[T.Bulgaria]       -0.1167      0.013     -8.712      0.000      -0.143      -0.090
C(name, Treatment("Finland"))[T.Croatia]        -0.0619      0.009     -6.723      0.000      -0.080      -0.044
C(name, Treatment("Finland"))[T.Cyprus]         -0.0280      0.011     -2.596      0.011      -0.049      -0.007
C(name, Treatment("Finland"))[T.Czechia]        -0.0403      0.009     -4.617      0.000      -0.058      -0.023
C(name, Treatment("Finland"))[T.Denmark]         0.0052      0.004      1.378      0.171      -0.002       0.013
C(name, Treatment("Finland"))[T.Estonia]        -0.0368      0.007     -5.595      0.000      -0.050      -0.024
C(name, Treatment("Finland"))[T.France]         -0.0217      0.007     -3.004      0.003      -0.036      -0.007
C(name, Treatment("Finland"))[T.Germany]         0.0140      0.009      1.602      0.112      -0.003       0.031
C(name, Treatment("Finland"))[T.Greece]         -0.0285      0.015     -1.955      0.053      -0.057       0.000
C(name, Treatment("Finland"))[T.Hungary]        -0.0736      0.011     -6.650      0.000      -0.096      -0.052
C(name, Treatment("Finland"))[T.Ireland]         0.0021      0.005      0.392      0.696      -0.009       0.013
C(name, Treatment("Finland"))[T.Italy]          -0.0357      0.012     -3.008      0.003      -0.059      -0.012
C(name, Treatment("Finland"))[T.Latvia]         -0.0507      0.009     -5.711      0.000      -0.068      -0.033
C(name, Treatment("Finland"))[T.Lithuania]      -0.0543      0.009     -5.833      0.000      -0.073      -0.036
C(name, Treatment("Finland"))[T.Luxembourg]     -0.0074      0.006     -1.320      0.189      -0.018       0.004
C(name, Treatment("Finland"))[T.Malta]          -0.0328      0.006     -5.596      0.000      -0.044      -0.021
C(name, Treatment("Finland"))[T.Netherlands]     0.0001      0.003      0.043      0.966      -0.006       0.007
C(name, Treatment("Finland"))[T.Poland]         -0.0475      0.012     -3.970      0.000      -0.071      -0.024
C(name, Treatment("Finland"))[T.Portugal]       -0.0703      0.009     -8.025      0.000      -0.088      -0.053
C(name, Treatment("Finland"))[T.Romania]        -0.0939      0.018     -5.110      0.000      -0.130      -0.058
C(name, Treatment("Finland"))[T.Slovakia]       -0.0693      0.011     -6.427      0.000      -0.091      -0.048
C(name, Treatment("Finland"))[T.Slovenia]       -0.0173      0.008     -2.111      0.037      -0.034      -0.001
C(name, Treatment("Finland"))[T.Spain]          -0.0258      0.007     -3.614      0.000      -0.040      -0.012
C(name, Treatment("Finland"))[T.Sweden]          0.0109      0.004      2.957      0.004       0.004       0.018
C(period)[T.2018]                                0.0003      0.001      0.203      0.839      -0.002       0.003
C(period)[T.2019]                                0.0017      0.002      0.782      0.436      -0.003       0.006
C(period)[T.2020]                               -0.0039      0.003     -1.204      0.231      -0.010       0.002
C(period)[T.2021]                               -0.0057      0.004     -1.314      0.191      -0.014       0.003
C(period)[T.2022]                               -0.0040      0.005     -0.728      0.468      -0.015       0.007
desi_conn                                     2.348e-05      0.000      0.085      0.933      -0.001       0.001
desi_idt                                         0.0031      0.001      5.149      0.000       0.002       0.004
desi_dps                                         0.0004      0.001      0.367      0.714      -0.002       0.002
desi_hc                                         -0.0008      0.001     -0.690      0.491      -0.003       0.002
health                                           0.0004      0.001      0.490      0.625      -0.001       0.002
unemployment                                    -0.0008      0.000     -2.338      0.021      -0.002      -0.000
education                                       -0.0024      0.001     -1.644      0.103      -0.005       0.000
==============================================================================
Omnibus:                        1.147   Durbin-Watson:                   2.449
Prob(Omnibus):                  0.564   Jarque-Bera (JB):                0.762
Skew:                          -0.061   Prob(JB):                        0.683
Kurtosis:                       3.313   Cond. No.                     5.42e+03
==============================================================================

Notes:
[1] Standard Errors assume that the covariance matrix of the errors is correctly specified.
[2] The condition number is large, 5.42e+03. This might indicate that there are
strong multicollinearity or other numerical problems.

Регрессии с фиксированными панельными показателями¶

Также после каждой модели проверим ее качество оценок коэффицентов по подгруппам стран, чтобы исключить смещение

In [32]:
model0 = PanelOLS.from_formula('hdi ~ total_score_desi + EntityEffects + TimeEffects + health  + education + unemployment', data = final_df_index).fit()
print(model0)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    hdi   R-squared:                        0.1614
Estimator:                   PanelOLS   R-squared (Between):              0.0328
No. Observations:                 162   R-squared (Within):               0.1907
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.0328
Time:                        10:31:30   Log-likelihood                    719.87
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      6.0607
Entities:                          27   P-value                           0.0002
Avg Obs:                       6.0000   Distribution:                   F(4,126)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             6.0607
                                        P-value                           0.0002
Time periods:                       6   Distribution:                   F(4,126)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                                Parameter Estimates                                 
====================================================================================
                  Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------------
total_score_desi     0.0008     0.0002     3.5847     0.0005      0.0003      0.0012
health               0.0001     0.0007     0.1792     0.8581     -0.0013      0.0016
education           -0.0032     0.0015    -2.1366     0.0346     -0.0062     -0.0002
unemployment        -0.0004     0.0003    -1.1907     0.2360     -0.0011      0.0003
====================================================================================

F-test for Poolability: 346.67
P-value: 0.0000
Distribution: F(31,126)

Included effects: Entity, Time
In [33]:
model0_robust_entity = PanelOLS.from_formula('hdi ~ total_score_desi + EntityEffects + TimeEffects + health  + education + unemployment', 
                                      data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model0_robust_entity)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    hdi   R-squared:                        0.1614
Estimator:                   PanelOLS   R-squared (Between):              0.0328
No. Observations:                 162   R-squared (Within):               0.1907
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.0328
Time:                        10:31:30   Log-likelihood                    719.87
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6.0607
Entities:                          27   P-value                           0.0002
Avg Obs:                       6.0000   Distribution:                   F(4,126)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             2.3205
                                        P-value                           0.0604
Time periods:                       6   Distribution:                   F(4,126)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                                Parameter Estimates                                 
====================================================================================
                  Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------------
total_score_desi     0.0008     0.0004     2.0660     0.0409   3.207e-05      0.0015
health               0.0001     0.0009     0.1393     0.8895     -0.0017      0.0020
education           -0.0032     0.0025    -1.2954     0.1975     -0.0081      0.0017
unemployment        -0.0004     0.0003    -1.2752     0.2046     -0.0010      0.0002
====================================================================================

F-test for Poolability: 346.67
P-value: 0.0000
Distribution: F(31,126)

Included effects: Entity, Time
In [34]:
model0_robust_entity_time = PanelOLS.from_formula('hdi ~ total_score_desi + EntityEffects + TimeEffects + health  + education + unemployment', 
                                      data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model0_robust_entity_time)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    hdi   R-squared:                        0.1614
Estimator:                   PanelOLS   R-squared (Between):              0.0328
No. Observations:                 162   R-squared (Within):               0.1907
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.0328
Time:                        10:31:31   Log-likelihood                    719.87
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6.0607
Entities:                          27   P-value                           0.0002
Avg Obs:                       6.0000   Distribution:                   F(4,126)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             4.3415
                                        P-value                           0.0025
Time periods:                       6   Distribution:                   F(4,126)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                                Parameter Estimates                                 
====================================================================================
                  Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
------------------------------------------------------------------------------------
total_score_desi     0.0008     0.0003     2.3938     0.0181      0.0001      0.0014
health               0.0001     0.0010     0.1293     0.8973     -0.0019      0.0022
education           -0.0032     0.0021    -1.5107     0.1334     -0.0074      0.0010
unemployment        -0.0004     0.0003    -1.6024     0.1116     -0.0009   9.625e-05
====================================================================================

F-test for Poolability: 346.67
P-value: 0.0000
Distribution: F(31,126)

Included effects: Entity, Time
In [35]:
print(compare({"FE-model": model0, "Robust-model(entity)": model0_robust_entity, "Robust-model(entity_time)": model0_robust_entity_time}, precision='pvalues'))
                                   Model Comparison                                  
=====================================================================================
                              FE-model Robust-model(entity) Robust-model(entity_time)
-------------------------------------------------------------------------------------
Dep. Variable                      hdi                  hdi                       hdi
Estimator                     PanelOLS             PanelOLS                  PanelOLS
No. Observations                   162                  162                       162
Cov. Est.                   Unadjusted            Clustered                 Clustered
R-squared                       0.1614               0.1614                    0.1614
R-Squared (Within)              0.1907               0.1907                    0.1907
R-Squared (Between)             0.0328               0.0328                    0.0328
R-Squared (Overall)             0.0328               0.0328                    0.0328
F-statistic                     6.0607               6.0607                    6.0607
P-value (F-stat)                0.0002               0.0002                    0.0002
=====================     ============          ===========               ===========
total_score_desi                0.0008               0.0008                    0.0008
                              (0.0005)             (0.0409)                  (0.0181)
health                          0.0001               0.0001                    0.0001
                              (0.8581)             (0.8895)                  (0.8973)
education                      -0.0032              -0.0032                   -0.0032
                              (0.0346)             (0.1975)                  (0.1334)
unemployment                   -0.0004              -0.0004                   -0.0004
                              (0.2360)             (0.2046)                  (0.1116)
======================= ==============        =============             =============
Effects                         Entity               Entity                    Entity
                                  Time                 Time                      Time
-------------------------------------------------------------------------------------

P-values reported in parentheses
In [36]:
def calc_weightedsum1(check_df):
    n = check_df.groupby('name', sort = False).size()
    cleanvarbygroups = check_df.groupby('name', sort = False).x_resid.var()*(n-1)
    
    coefconcor_multiplereg = []
    for country in check_df['name'].unique():
        subsample = check_df.loc[check_df['name'] == country,]
        coefconcor_multiplereg.append(statf.ols(formula = 'y_resid ~ x_resid', data = subsample).fit().params.iloc[1])
    weightedsum1 = sum(coefconcor_multiplereg*cleanvarbygroups/sum(cleanvarbygroups))
    print('Manual calculation:', weightedsum1)
    return weightedsum1


def check_groups(model, var_to_check, target, other_vars=None):
    check_df = final_df.loc[:,['name', 'period']]
    formula_target = f'{target} ~ health + unemployment + education + C(name, Treatment("Finland")) + C(period)'
    formula_var_x = f'{var_to_check} ~ health + unemployment + education + C(name, Treatment("Finland")) + C(period)'
    if other_vars is not None:
        for var in other_vars:
            formula_target += f' + {var}'
            formula_var_x += f' + {var}'
        
    check_df['y_resid'] = statf.ols(formula = formula_target, data = final_df).fit().resid
    check_df['x_resid'] = statf.ols(formula = formula_var_x, data = final_df).fit().resid

    print('\033[1m', var_to_check, '\033[0m')
    weightedsum1 = calc_weightedsum1(check_df)
    print('Model coeff:', model.params[var_to_check])
    assert np.allclose(weightedsum1, model.params[var_to_check]), 'Coefficients mismatch'

var_to_check = 'total_score_desi'
target = 'hdi'
check_groups(model0,var_to_check, target)
 total_score_desi 
Manual calculation: 0.0007616990949393254
Model coeff: 0.0007616990949393253
In [37]:
model1 = PanelOLS.from_formula('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
                               data = final_df_index).fit()
print(model1)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    hdi   R-squared:                        0.2767
Estimator:                   PanelOLS   R-squared (Between):              0.0101
No. Observations:                 162   R-squared (Within):               0.2732
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.0101
Time:                        10:31:31   Log-likelihood                    731.85
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      6.7219
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             6.7219
                                        P-value                           0.0000
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn     2.348e-05     0.0003     0.0845     0.9328     -0.0005      0.0006
desi_idt         0.0031     0.0006     5.1490     0.0000      0.0019      0.0043
desi_dps         0.0004     0.0010     0.3675     0.7139     -0.0017      0.0025
desi_hc         -0.0008     0.0012    -0.6903     0.4913     -0.0032      0.0016
health           0.0004     0.0007     0.4897     0.6252     -0.0011      0.0018
unemployment    -0.0008     0.0003    -2.3376     0.0210     -0.0015     -0.0001
education       -0.0024     0.0014    -1.6437     0.1028     -0.0052      0.0005
================================================================================

F-test for Poolability: 261.43
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [38]:
model1_robust_entity = PanelOLS.from_formula('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model1_robust_entity)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    hdi   R-squared:                        0.2767
Estimator:                   PanelOLS   R-squared (Between):              0.0101
No. Observations:                 162   R-squared (Within):               0.2732
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.0101
Time:                        10:31:32   Log-likelihood                    731.85
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6.7219
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             3.0158
                                        P-value                           0.0059
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn     2.348e-05     0.0003     0.0698     0.9445     -0.0006      0.0007
desi_idt         0.0031     0.0011     2.8151     0.0057      0.0009      0.0053
desi_dps         0.0004     0.0016     0.2373     0.8128     -0.0028      0.0036
desi_hc         -0.0008     0.0019    -0.4473     0.6555     -0.0045      0.0028
health           0.0004     0.0012     0.2998     0.7648     -0.0020      0.0027
unemployment    -0.0008     0.0003    -2.8755     0.0048     -0.0014     -0.0003
education       -0.0024     0.0023    -1.0481     0.2967     -0.0068      0.0021
================================================================================

F-test for Poolability: 261.43
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [39]:
model1_robust_entity_time = PanelOLS.from_formula('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education',
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model1_robust_entity_time)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    hdi   R-squared:                        0.2767
Estimator:                   PanelOLS   R-squared (Between):              0.0101
No. Observations:                 162   R-squared (Within):               0.2732
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.0101
Time:                        10:31:32   Log-likelihood                    731.85
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      6.7219
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             0.5717
                                        P-value                           0.7778
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn     2.348e-05     0.0003     0.0802     0.9362     -0.0006      0.0006
desi_idt         0.0031     0.0010     3.1935     0.0018      0.0012      0.0050
desi_dps         0.0004     0.0021     0.1874     0.8517     -0.0037      0.0045
desi_hc         -0.0008     0.0017    -0.4807     0.6316     -0.0043      0.0026
health           0.0004     0.0013     0.2697     0.7878     -0.0022      0.0029
unemployment    -0.0008     0.0002    -3.8722     0.0002     -0.0012     -0.0004
education       -0.0024     0.0018    -1.3422     0.1820     -0.0058      0.0011
================================================================================

F-test for Poolability: 261.43
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [40]:
print(compare({"FE-model": model1, "Robust-model(entity)": model1_robust_entity, "Robust-model(entity_time)": model1_robust_entity_time},
              precision='pvalues'))
                                   Model Comparison                                   
======================================================================================
                               FE-model Robust-model(entity) Robust-model(entity_time)
--------------------------------------------------------------------------------------
Dep. Variable                       hdi                  hdi                       hdi
Estimator                      PanelOLS             PanelOLS                  PanelOLS
No. Observations                    162                  162                       162
Cov. Est.                    Unadjusted            Clustered                 Clustered
R-squared                        0.2767               0.2767                    0.2767
R-Squared (Within)               0.2732               0.2732                    0.2732
R-Squared (Between)              0.0101               0.0101                    0.0101
R-Squared (Overall)              0.0101               0.0101                    0.0101
F-statistic                      6.7219               6.7219                    6.7219
P-value (F-stat)                 0.0000               0.0000                    0.0000
=====================     =============          ===========               ===========
desi_conn                     2.348e-05            2.348e-05                 2.348e-05
                               (0.9328)             (0.9445)                  (0.9362)
desi_idt                         0.0031               0.0031                    0.0031
                            (1.008e-06)             (0.0057)                  (0.0018)
desi_dps                         0.0004               0.0004                    0.0004
                               (0.7139)             (0.8128)                  (0.8517)
desi_hc                         -0.0008              -0.0008                   -0.0008
                               (0.4913)             (0.6555)                  (0.6316)
health                           0.0004               0.0004                    0.0004
                               (0.6252)             (0.7648)                  (0.7878)
unemployment                    -0.0008              -0.0008                   -0.0008
                               (0.0210)             (0.0048)                  (0.0002)
education                       -0.0024              -0.0024                   -0.0024
                               (0.1028)             (0.2967)                  (0.1820)
======================= ===============        =============             =============
Effects                          Entity               Entity                    Entity
                                   Time                 Time                      Time
--------------------------------------------------------------------------------------

P-values reported in parentheses
In [41]:
var_to_check = 'desi_conn'
target = 'hdi'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model1, var_to_check, target, other_vars)

var_to_check = 'desi_idt'
target = 'hdi'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model1, var_to_check, target, other_vars)

var_to_check = 'desi_dps'
target = 'hdi'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model1, var_to_check, target, other_vars)

var_to_check = 'desi_hc'
target = 'hdi'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model1, var_to_check, target, other_vars)
 desi_conn 
Manual calculation: 2.3478822678223248e-05
Model coeff: 2.3478822678212494e-05
 desi_idt 
Manual calculation: 0.003099960566821051
Model coeff: 0.0030999605668210555
 desi_dps 
Manual calculation: 0.00038499008158690855
Model coeff: 0.00038499008158690573
 desi_hc 
Manual calculation: -0.0008308973659444528
Model coeff: -0.0008308973659444756
In [42]:
model2 = PanelOLS.from_formula('le ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit()
print(model2)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                     le   R-squared:                        0.4150
Estimator:                   PanelOLS   R-squared (Between):              0.1541
No. Observations:                 162   R-squared (Within):              -5.2924
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.1538
Time:                        10:31:34   Log-likelihood                   -78.789
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      12.463
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             12.463
                                        P-value                           0.0000
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0065     0.0414     0.1560     0.8763     -0.0755      0.0884
desi_idt         0.5896     0.0897     6.5724     0.0000      0.4120      0.7671
desi_dps         0.3281     0.1561     2.1017     0.0376      0.0191      0.6371
desi_hc         -0.0967     0.1794    -0.5393     0.5907     -0.4517      0.2583
health           0.0820     0.1075     0.7629     0.4470     -0.1308      0.2948
unemployment    -0.0786     0.0519    -1.5136     0.1327     -0.1814      0.0242
education       -0.3281     0.2140    -1.5332     0.1278     -0.7518      0.0955
================================================================================

F-test for Poolability: 103.37
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [43]:
model2_robust_entity = PanelOLS.from_formula('le ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model2_robust_entity)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                     le   R-squared:                        0.4150
Estimator:                   PanelOLS   R-squared (Between):              0.1541
No. Observations:                 162   R-squared (Within):              -5.2924
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.1538
Time:                        10:31:34   Log-likelihood                   -78.789
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      12.463
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             5.3704
                                        P-value                           0.0000
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0065     0.0487     0.1325     0.8948     -0.0900      0.1029
desi_idt         0.5896     0.1286     4.5842     0.0000      0.3350      0.8442
desi_dps         0.3281     0.2093     1.5677     0.1195     -0.0862      0.7423
desi_hc         -0.0967     0.2867    -0.3373     0.7364     -0.6643      0.4708
health           0.0820     0.1564     0.5243     0.6010     -0.2276      0.3917
unemployment    -0.0786     0.0354    -2.2205     0.0282     -0.1487     -0.0085
education       -0.3281     0.2597    -1.2636     0.2088     -0.8421      0.1859
================================================================================

F-test for Poolability: 103.37
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [44]:
model2_robust_entity_time = PanelOLS.from_formula('le ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model2_robust_entity_time)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                     le   R-squared:                        0.4150
Estimator:                   PanelOLS   R-squared (Between):              0.1541
No. Observations:                 162   R-squared (Within):              -5.2924
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.1538
Time:                        10:31:34   Log-likelihood                   -78.789
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      12.463
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             14.565
                                        P-value                           0.0000
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0065     0.0542     0.1191     0.9054     -0.1008      0.1138
desi_idt         0.5896     0.1004     5.8713     0.0000      0.3908      0.7883
desi_dps         0.3281     0.2918     1.1242     0.2631     -0.2496      0.9058
desi_hc         -0.0967     0.2637    -0.3668     0.7144     -0.6187      0.4253
health           0.0820     0.1860     0.4410     0.6600     -0.2862      0.4502
unemployment    -0.0786     0.0216    -3.6324     0.0004     -0.1215     -0.0358
education       -0.3281     0.2080    -1.5773     0.1173     -0.7399      0.0837
================================================================================

F-test for Poolability: 103.37
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [45]:
print(compare({"FE-model": model2,  "Robust-model(entity)": model2_robust_entity,"Robust-model(entity-time)": model2_robust_entity_time}, precision='pvalues'))
                                   Model Comparison                                   
======================================================================================
                               FE-model Robust-model(entity) Robust-model(entity-time)
--------------------------------------------------------------------------------------
Dep. Variable                        le                   le                        le
Estimator                      PanelOLS             PanelOLS                  PanelOLS
No. Observations                    162                  162                       162
Cov. Est.                    Unadjusted            Clustered                 Clustered
R-squared                        0.4150               0.4150                    0.4150
R-Squared (Within)              -5.2924              -5.2924                   -5.2924
R-Squared (Between)              0.1541               0.1541                    0.1541
R-Squared (Overall)              0.1538               0.1538                    0.1538
F-statistic                      12.463               12.463                    12.463
P-value (F-stat)                 0.0000               0.0000                    0.0000
=====================     =============        =============             =============
desi_conn                        0.0065               0.0065                    0.0065
                               (0.8763)             (0.8948)                  (0.9054)
desi_idt                         0.5896               0.5896                    0.5896
                            (1.259e-09)          (1.103e-05)               (3.763e-08)
desi_dps                         0.3281               0.3281                    0.3281
                               (0.0376)             (0.1195)                  (0.2631)
desi_hc                         -0.0967              -0.0967                   -0.0967
                               (0.5907)             (0.7364)                  (0.7144)
health                           0.0820               0.0820                    0.0820
                               (0.4470)             (0.6010)                  (0.6600)
unemployment                    -0.0786              -0.0786                   -0.0786
                               (0.1327)             (0.0282)                  (0.0004)
education                       -0.3281              -0.3281                   -0.3281
                               (0.1278)             (0.2088)                  (0.1173)
======================= ===============      ===============           ===============
Effects                          Entity               Entity                    Entity
                                   Time                 Time                      Time
--------------------------------------------------------------------------------------

P-values reported in parentheses
In [46]:
var_to_check = 'desi_conn'
target = 'le'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model2, var_to_check, target, other_vars)

var_to_check = 'desi_idt'
target = 'le'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model2, var_to_check, target, other_vars)

var_to_check = 'desi_dps'
target = 'le'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model2, var_to_check, target, other_vars)

var_to_check = 'desi_hc'
target = 'le'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model2, var_to_check, target, other_vars)
 desi_conn 
Manual calculation: 0.006456409482128464
Model coeff: 0.0064564094821282485
 desi_idt 
Manual calculation: 0.5895773771768185
Model coeff: 0.5895773771768196
 desi_dps 
Manual calculation: 0.32808049800709094
Model coeff: 0.32808049800709244
 desi_hc 
Manual calculation: -0.09672010276064617
Model coeff: -0.09672010276064694
In [47]:
model3 = PanelOLS.from_formula('eys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit()
print(model3)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    eys   R-squared:                        0.2304
Estimator:                   PanelOLS   R-squared (Between):             -0.0186
No. Observations:                 162   R-squared (Within):               0.2635
Date:                Thu, Jun 13 2024   R-squared (Overall):             -0.0185
Time:                        10:31:36   Log-likelihood                    72.800
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      5.2602
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             5.2602
                                        P-value                           0.0000
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0281     0.0162     1.7281     0.0865     -0.0041      0.0602
desi_idt         0.0391     0.0352     1.1098     0.2692     -0.0306      0.1087
desi_dps        -0.1117     0.0612    -1.8236     0.0706     -0.2329      0.0095
desi_hc          0.1031     0.0704     1.4657     0.1453     -0.0361      0.2424
health           0.1213     0.0422     2.8762     0.0047      0.0378      0.2048
unemployment    -0.0698     0.0204    -3.4263     0.0008     -0.1102     -0.0295
education       -0.1234     0.0840    -1.4699     0.1441     -0.2896      0.0428
================================================================================

F-test for Poolability: 192.43
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [48]:
model3_robust_entity = PanelOLS.from_formula('eys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model3_robust_entity)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    eys   R-squared:                        0.2304
Estimator:                   PanelOLS   R-squared (Between):             -0.0186
No. Observations:                 162   R-squared (Within):               0.2635
Date:                Thu, Jun 13 2024   R-squared (Overall):             -0.0185
Time:                        10:31:36   Log-likelihood                    72.800
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      5.2602
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             3.6836
                                        P-value                           0.0012
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0281     0.0209     1.3401     0.1827     -0.0134      0.0695
desi_idt         0.0391     0.0651     0.5997     0.5498     -0.0899      0.1680
desi_dps        -0.1117     0.0931    -1.1997     0.2326     -0.2959      0.0726
desi_hc          0.1031     0.0902     1.1427     0.2554     -0.0755      0.2818
health           0.1213     0.0494     2.4565     0.0154      0.0236      0.2191
unemployment    -0.0698     0.0307    -2.2706     0.0249     -0.1307     -0.0090
education       -0.1234     0.1033    -1.1948     0.2345     -0.3279      0.0810
================================================================================

F-test for Poolability: 192.43
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [49]:
model3_robust_entity_time = PanelOLS.from_formula('eys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model3_robust_entity_time)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    eys   R-squared:                        0.2304
Estimator:                   PanelOLS   R-squared (Between):             -0.0186
No. Observations:                 162   R-squared (Within):               0.2635
Date:                Thu, Jun 13 2024   R-squared (Overall):             -0.0185
Time:                        10:31:36   Log-likelihood                    72.800
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      5.2602
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             3.8213
                                        P-value                           0.0009
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0281     0.0167     1.6773     0.0960     -0.0051      0.0612
desi_idt         0.0391     0.0544     0.7178     0.4742     -0.0686      0.1468
desi_dps        -0.1117     0.0820    -1.3615     0.1759     -0.2740      0.0507
desi_hc          0.1031     0.1019     1.0118     0.3136     -0.0986      0.3049
health           0.1213     0.0492     2.4651     0.0151      0.0239      0.2187
unemployment    -0.0698     0.0223    -3.1309     0.0022     -0.1140     -0.0257
education       -0.1234     0.0874    -1.4117     0.1606     -0.2965      0.0496
================================================================================

F-test for Poolability: 192.43
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [50]:
print(compare({"FE-model": model3, "Robust-model(entity)": model3_robust_entity, "Robust-model(entity-time)": model3_robust_entity_time}, precision='pvalues'))
                                   Model Comparison                                  
=====================================================================================
                              FE-model Robust-model(entity) Robust-model(entity-time)
-------------------------------------------------------------------------------------
Dep. Variable                      eys                  eys                       eys
Estimator                     PanelOLS             PanelOLS                  PanelOLS
No. Observations                   162                  162                       162
Cov. Est.                   Unadjusted            Clustered                 Clustered
R-squared                       0.2304               0.2304                    0.2304
R-Squared (Within)              0.2635               0.2635                    0.2635
R-Squared (Between)            -0.0186              -0.0186                   -0.0186
R-Squared (Overall)            -0.0185              -0.0185                   -0.0185
F-statistic                     5.2602               5.2602                    5.2602
P-value (F-stat)                0.0000               0.0000                    0.0000
=====================     ============          ===========               ===========
desi_conn                       0.0281               0.0281                    0.0281
                              (0.0865)             (0.1827)                  (0.0960)
desi_idt                        0.0391               0.0391                    0.0391
                              (0.2692)             (0.5498)                  (0.4742)
desi_dps                       -0.1117              -0.1117                   -0.1117
                              (0.0706)             (0.2326)                  (0.1759)
desi_hc                         0.1031               0.1031                    0.1031
                              (0.1453)             (0.2554)                  (0.3136)
health                          0.1213               0.1213                    0.1213
                              (0.0047)             (0.0154)                  (0.0151)
unemployment                   -0.0698              -0.0698                   -0.0698
                              (0.0008)             (0.0249)                  (0.0022)
education                      -0.1234              -0.1234                   -0.1234
                              (0.1441)             (0.2345)                  (0.1606)
======================= ==============        =============             =============
Effects                         Entity               Entity                    Entity
                                  Time                 Time                      Time
-------------------------------------------------------------------------------------

P-values reported in parentheses
In [51]:
var_to_check = 'desi_conn'
target = 'eys'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model3, var_to_check, target, other_vars)

var_to_check = 'desi_idt'
target = 'eys'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model3, var_to_check, target, other_vars)

var_to_check = 'desi_dps'
target = 'eys'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model3, var_to_check, target, other_vars)

var_to_check = 'desi_hc'
target = 'eys'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model3, var_to_check, target, other_vars)
 desi_conn 
Manual calculation: 0.028057255846445122
Model coeff: 0.028057255846445035
 desi_idt 
Manual calculation: 0.0390558233372044
Model coeff: 0.039055823337204996
 desi_dps 
Manual calculation: -0.11167477699862385
Model coeff: -0.11167477699862337
 desi_hc 
Manual calculation: 0.10312430686054608
Model coeff: 0.10312430686054501
In [52]:
model4 = PanelOLS.from_formula('mys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit()
print(model4)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    mys   R-squared:                        0.3070
Estimator:                   PanelOLS   R-squared (Between):             -0.0209
No. Observations:                 162   R-squared (Within):              -0.2977
Date:                Thu, Jun 13 2024   R-squared (Overall):             -0.0209
Time:                        10:31:37   Log-likelihood                    221.12
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      7.7853
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             7.7853
                                        P-value                           0.0000
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0032     0.0065     0.4910     0.6243     -0.0097      0.0161
desi_idt         0.0153     0.0141     1.0842     0.2804     -0.0126      0.0432
desi_dps        -0.0511     0.0245    -2.0845     0.0392     -0.0996     -0.0026
desi_hc          0.0280     0.0282     0.9934     0.3224     -0.0278      0.0837
health          -0.0104     0.0169    -0.6168     0.5385     -0.0438      0.0230
unemployment    -0.0451     0.0082    -5.5294     0.0000     -0.0612     -0.0290
education        0.1039     0.0336     3.0906     0.0025      0.0373      0.1704
================================================================================

F-test for Poolability: 637.25
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [53]:
model4_robust_entity = PanelOLS.from_formula('mys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model4_robust_entity )
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    mys   R-squared:                        0.3070
Estimator:                   PanelOLS   R-squared (Between):             -0.0209
No. Observations:                 162   R-squared (Within):              -0.2977
Date:                Thu, Jun 13 2024   R-squared (Overall):             -0.0209
Time:                        10:31:37   Log-likelihood                    221.12
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      7.7853
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             2.4435
                                        P-value                           0.0222
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0032     0.0070     0.4557     0.6494     -0.0107      0.0171
desi_idt         0.0153     0.0224     0.6827     0.4961     -0.0290      0.0596
desi_dps        -0.0511     0.0363    -1.4065     0.1621     -0.1230      0.0208
desi_hc          0.0280     0.0345     0.8102     0.4194     -0.0404      0.0963
health          -0.0104     0.0179    -0.5829     0.5610     -0.0458      0.0250
unemployment    -0.0451     0.0162    -2.7841     0.0062     -0.0772     -0.0130
education        0.1039     0.0608     1.7093     0.0899     -0.0164      0.2242
================================================================================

F-test for Poolability: 637.25
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [54]:
model4_robust_entity_time = PanelOLS.from_formula('mys ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model4_robust_entity_time )
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                    mys   R-squared:                        0.3070
Estimator:                   PanelOLS   R-squared (Between):             -0.0209
No. Observations:                 162   R-squared (Within):              -0.2977
Date:                Thu, Jun 13 2024   R-squared (Overall):             -0.0209
Time:                        10:31:38   Log-likelihood                    221.12
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      7.7853
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             4.0875
                                        P-value                           0.0005
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        0.0032     0.0060     0.5321     0.5956     -0.0087      0.0151
desi_idt         0.0153     0.0188     0.8143     0.4170     -0.0219      0.0524
desi_dps        -0.0511     0.0322    -1.5878     0.1149     -0.1148      0.0126
desi_hc          0.0280     0.0290     0.9649     0.3365     -0.0294      0.0854
health          -0.0104     0.0177    -0.5897     0.5565     -0.0454      0.0245
unemployment    -0.0451     0.0137    -3.2802     0.0013     -0.0723     -0.0179
education        0.1039     0.0495     2.0984     0.0379      0.0059      0.2019
================================================================================

F-test for Poolability: 637.25
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [55]:
print(compare({"FE-model": model4, "Robust-model(entity)": model4_robust_entity,"Robust-model(entity-time)": model4_robust_entity_time}, precision='pvalues'))
                                   Model Comparison                                   
======================================================================================
                               FE-model Robust-model(entity) Robust-model(entity-time)
--------------------------------------------------------------------------------------
Dep. Variable                       mys                  mys                       mys
Estimator                      PanelOLS             PanelOLS                  PanelOLS
No. Observations                    162                  162                       162
Cov. Est.                    Unadjusted            Clustered                 Clustered
R-squared                        0.3070               0.3070                    0.3070
R-Squared (Within)              -0.2977              -0.2977                   -0.2977
R-Squared (Between)             -0.0209              -0.0209                   -0.0209
R-Squared (Overall)             -0.0209              -0.0209                   -0.0209
F-statistic                      7.7853               7.7853                    7.7853
P-value (F-stat)                 0.0000               0.0000                    0.0000
=====================     =============          ===========               ===========
desi_conn                        0.0032               0.0032                    0.0032
                               (0.6243)             (0.6494)                  (0.5956)
desi_idt                         0.0153               0.0153                    0.0153
                               (0.2804)             (0.4961)                  (0.4170)
desi_dps                        -0.0511              -0.0511                   -0.0511
                               (0.0392)             (0.1621)                  (0.1149)
desi_hc                          0.0280               0.0280                    0.0280
                               (0.3224)             (0.4194)                  (0.3365)
health                          -0.0104              -0.0104                   -0.0104
                               (0.5385)             (0.5610)                  (0.5565)
unemployment                    -0.0451              -0.0451                   -0.0451
                            (1.837e-07)             (0.0062)                  (0.0013)
education                        0.1039               0.1039                    0.1039
                               (0.0025)             (0.0899)                  (0.0379)
======================= ===============        =============             =============
Effects                          Entity               Entity                    Entity
                                   Time                 Time                      Time
--------------------------------------------------------------------------------------

P-values reported in parentheses
In [56]:
var_to_check = 'desi_conn'
target = 'mys'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model4, var_to_check, target, other_vars)

var_to_check = 'desi_idt'
target = 'mys'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model4, var_to_check, target, other_vars)

var_to_check = 'desi_dps'
target = 'mys'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model4, var_to_check, target, other_vars)

var_to_check = 'desi_hc'
target = 'mys'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model4, var_to_check, target, other_vars)
 desi_conn 
Manual calculation: 0.0031911750019771906
Model coeff: 0.0031911750019773316
 desi_idt 
Manual calculation: 0.015272599852596696
Model coeff: 0.015272599852596418
 desi_dps 
Manual calculation: -0.05109764047758645
Model coeff: -0.05109764047758676
 desi_hc 
Manual calculation: 0.02798003855050323
Model coeff: 0.0279800385505033
In [57]:
model5 = PanelOLS.from_formula('gnipc ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit()
print(model5)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                  gnipc   R-squared:                        0.2930
Estimator:                   PanelOLS   R-squared (Between):              0.1479
No. Observations:                 162   R-squared (Within):               0.1735
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.1480
Time:                        10:31:40   Log-likelihood                   -1412.0
Cov. Estimator:            Unadjusted                                           
                                        F-statistic:                      7.2835
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             7.2835
                                        P-value                           0.0000
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        262.41     155.20     1.6907     0.0934     -44.812      569.62
desi_idt        -1375.0     336.39    -4.0874     0.0001     -2040.9     -709.11
desi_dps         2497.9     585.38     4.2672     0.0000      1339.2      3656.7
desi_hc         -363.02     672.57    -0.5398     0.5903     -1694.3      968.29
health          -1149.4     403.16    -2.8510     0.0051     -1947.5     -351.37
unemployment     107.87     194.79     0.5538     0.5807     -277.70      493.44
education       -2770.6     802.56    -3.4522     0.0008     -4359.2     -1182.0
================================================================================

F-test for Poolability: 152.39
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [58]:
model5_robust_entity = PanelOLS.from_formula('gnipc ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True)
print(model5_robust_entity)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                  gnipc   R-squared:                        0.2930
Estimator:                   PanelOLS   R-squared (Between):              0.1479
No. Observations:                 162   R-squared (Within):               0.1735
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.1480
Time:                        10:31:40   Log-likelihood                   -1412.0
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      7.2835
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):             1.1888
                                        P-value                           0.3141
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        262.41     311.50     0.8424     0.4012     -354.20      879.01
desi_idt        -1375.0     929.79    -1.4788     0.1417     -3215.4      465.48
desi_dps         2497.9     1802.3     1.3860     0.1683     -1069.5      6065.4
desi_hc         -363.02     563.66    -0.6440     0.5207     -1478.7      752.70
health          -1149.4     609.81    -1.8849     0.0618     -2356.5      57.665
unemployment     107.87     204.50     0.5275     0.5988     -296.92      512.66
education       -2770.6     1648.0    -1.6812     0.0953     -6032.8      491.59
================================================================================

F-test for Poolability: 152.39
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [59]:
model5_robust_entity_time = PanelOLS.from_formula('gnipc ~ desi_conn + desi_idt + desi_dps + desi_hc + EntityEffects + TimeEffects + health + unemployment + education', 
                               data = final_df_index).fit(cov_type = "clustered", cluster_entity = True, cluster_time = True)
print(model5_robust_entity_time)
                          PanelOLS Estimation Summary                           
================================================================================
Dep. Variable:                  gnipc   R-squared:                        0.2930
Estimator:                   PanelOLS   R-squared (Between):              0.1479
No. Observations:                 162   R-squared (Within):               0.1735
Date:                Thu, Jun 13 2024   R-squared (Overall):              0.1480
Time:                        10:31:40   Log-likelihood                   -1412.0
Cov. Estimator:             Clustered                                           
                                        F-statistic:                      7.2835
Entities:                          27   P-value                           0.0000
Avg Obs:                       6.0000   Distribution:                   F(7,123)
Min Obs:                       6.0000                                           
Max Obs:                       6.0000   F-statistic (robust):            -2.4624
                                        P-value                           1.0000
Time periods:                       6   Distribution:                   F(7,123)
Avg Obs:                       27.000                                           
Min Obs:                       27.000                                           
Max Obs:                       27.000                                           
                                                                                
                              Parameter Estimates                               
================================================================================
              Parameter  Std. Err.     T-stat    P-value    Lower CI    Upper CI
--------------------------------------------------------------------------------
desi_conn        262.41     283.69     0.9250     0.3568     -299.14      823.95
desi_idt        -1375.0     787.76    -1.7454     0.0834     -2934.3      184.34
desi_dps         2497.9     1352.6     1.8468     0.0672     -179.42      5175.3
desi_hc         -363.02     414.05    -0.8768     0.3823     -1182.6      456.56
health          -1149.4     530.17    -2.1680     0.0321     -2198.8     -99.976
unemployment     107.87     140.81     0.7661     0.4451     -170.85      386.59
education       -2770.6     1493.9    -1.8546     0.0661     -5727.8      186.55
================================================================================

F-test for Poolability: 152.39
P-value: 0.0000
Distribution: F(31,123)

Included effects: Entity, Time
In [60]:
print(compare({"FE-model": model5, "Robust-model(entity)": model5_robust_entity, "Robust-model(entity-time)": model5_robust_entity_time}, precision='pvalues'))
                                   Model Comparison                                   
======================================================================================
                               FE-model Robust-model(entity) Robust-model(entity-time)
--------------------------------------------------------------------------------------
Dep. Variable                     gnipc                gnipc                     gnipc
Estimator                      PanelOLS             PanelOLS                  PanelOLS
No. Observations                    162                  162                       162
Cov. Est.                    Unadjusted            Clustered                 Clustered
R-squared                        0.2930               0.2930                    0.2930
R-Squared (Within)               0.1735               0.1735                    0.1735
R-Squared (Between)              0.1479               0.1479                    0.1479
R-Squared (Overall)              0.1480               0.1480                    0.1480
F-statistic                      7.2835               7.2835                    7.2835
P-value (F-stat)                 0.0000               0.0000                    0.0000
=====================     =============          ===========               ===========
desi_conn                        262.41               262.41                    262.41
                               (0.0934)             (0.4012)                  (0.3568)
desi_idt                        -1375.0              -1375.0                   -1375.0
                            (7.815e-05)             (0.1417)                  (0.0834)
desi_dps                         2497.9               2497.9                    2497.9
                            (3.913e-05)             (0.1683)                  (0.0672)
desi_hc                         -363.02              -363.02                   -363.02
                               (0.5903)             (0.5207)                  (0.3823)
health                          -1149.4              -1149.4                   -1149.4
                               (0.0051)             (0.0618)                  (0.0321)
unemployment                     107.87               107.87                    107.87
                               (0.5807)             (0.5988)                  (0.4451)
education                       -2770.6              -2770.6                   -2770.6
                               (0.0008)             (0.0953)                  (0.0661)
======================= ===============        =============             =============
Effects                          Entity               Entity                    Entity
                                   Time                 Time                      Time
--------------------------------------------------------------------------------------

P-values reported in parentheses
In [61]:
var_to_check = 'desi_conn'
target = 'gnipc'
other_vars = ['desi_idt', 'desi_dps', 'desi_hc']
check_groups(model5, var_to_check, target, other_vars)

var_to_check = 'desi_idt'
target = 'gnipc'
other_vars = ['desi_conn', 'desi_dps', 'desi_hc']
check_groups(model5, var_to_check, target, other_vars)

var_to_check = 'desi_dps'
target = 'gnipc'
other_vars = ['desi_conn', 'desi_idt', 'desi_hc']
check_groups(model5, var_to_check, target, other_vars)

var_to_check = 'desi_hc'
target = 'gnipc'
other_vars = ['desi_conn', 'desi_idt', 'desi_dps']
check_groups(model5, var_to_check, target, other_vars)
 desi_conn 
Manual calculation: 262.4061188607826
Model coeff: 262.4061188607813
 desi_idt 
Manual calculation: -1374.980072271474
Model coeff: -1374.9800722714735
 desi_dps 
Manual calculation: 2497.9494924253463
Model coeff: 2497.9494924253477
 desi_hc 
Manual calculation: -363.022081003925
Model coeff: -363.0220810039283

Оценка модели HDI и I-desi¶

In [62]:
y, X = dmatrices('hdi ~ desi_conn + desi_idt + desi_dps + desi_hc + unemployment + education + health', 
                 data=final_df, return_type='dataframe')
vif = pd.DataFrame()
vif['VIF'] = [variance_inflation_factor(X.values, i) for i in range(X.shape[1])]
vif['variable'] = X.columns
vif
Out[62]:
VIF variable
0 72.768942 Intercept
1 1.891819 desi_conn
2 4.180753 desi_idt
3 3.829759 desi_dps
4 3.621648 desi_hc
5 1.085186 unemployment
6 1.448257 education
7 1.513656 health
In [ ]:
 
In [ ]:
 
In [ ]: